Flowchart

library(GENIE3)
library(doParallel)
library(igraph)
library(tidyverse)
library(DT)
library(reticulate)
library(learn2count)
library(rbenchmark)
library(reshape2)
library(gridExtra)
library(DiagrammeR)
library(pROC)

use_python("/usr/bin/python3", required = TRUE)
arboreto <- import("arboreto.algo")
pandas <- import("pandas")
numpy <- import("numpy")
source("dropo.R")
source("generate_adjacency.R")
source("simmetric.R")
source("pscores.R")
source("plotg.R")
source("compare_consensus.R")
source("earlyj.R")
source("plotROC.R")
source("cutoff_adjacency.R")
source("infer_networks.R")
DiagrammeR::grViz("
digraph biological_workflow {
  # Set up the graph attributes
  graph [layout = dot, rankdir = TB]

  # Define consistent node styles
  node [shape = rectangle, style = filled, color = lightblue, fontsize = 12]

  # Define nodes for each step
  StartNode [label = 'Ground Thruth - String Regulatory Network', shape = oval, color = seagreen, fontcolor = black]
  AdjacencyMatrix [label = 'Thruth Adjacency Matrix', shape = rectangle, color = seagreen]
  SimulateData [label = 'Simulate Single-Cell Data', shape = rectangle, color = goldenrod]

  # Reconstruction using Three Packages
  LateIntegration [label = 'Late\nIntegration', shape = oval, color = khaki]
  EarlyIntegration [label = 'Early\nIntegration', shape = oval, color = khaki]
  Jointanalysis [label = 'Joint\nanalysis', shape = oval, color = khaki]
  

  # Process 
  earlyj [label = 'earlyj.R', shape=diamond, color=lightblue, fontcolor=black]
  networkinference [label = 'infer_networks.R\nGENIE3\nGRNBoost2\nJRF', shape = rectangle, color = goldenrod, fontcolor=black]
  simmetric [label = 'simmetric.R', shape = rectangle, color = goldenrod, fontcolor=black]
  plotROC [label = 'plotROC.R', shape=diamond, color=lightblue, fontcolor=black]
  generateadjacency [label='generate_adjacency.R\nWeighted Adjacency', shape=rectangle, color=goldenrod, fontcolor=black]
  cutoffadjacency [label='cutoff_adjacency.R\nBinary Adjacency', shape=rectangle, color=goldenrod, fontcolor=black]
  pscores [label='pscores.R\nTPR\nFPR\nF1\nAccuracy\nPrecision', shape=diamond, color=lightblue, fontcolor=black]
  voting [label='Edges voting', shape=diamond, color=lightblue, fontcolor=black]
  plotgcompare  [label='plotg.R\ncompare_consesus.R\nPlot Graphs', shape=rectangle, color=goldenrod, fontcolor=black]

  # Define the workflow structure
  StartNode -> AdjacencyMatrix
  AdjacencyMatrix -> SimulateData
  SimulateData -> LateIntegration
  SimulateData -> EarlyIntegration
  SimulateData -> Jointanalysis
  EarlyIntegration -> earlyj
  earlyj -> networkinference
  LateIntegration -> networkinference
  Jointanalysis -> networkinference
  networkinference -> simmetric
  simmetric -> plotROC
  simmetric -> generateadjacency
  generateadjacency -> cutoffadjacency
  cutoffadjacency -> pscores
  cutoffadjacency -> voting
  voting -> plotgcompare
}
")

Tcell Ground Truth

adjm <- read.table("./../data/adjacency_matrix.csv", header = T, row.names = 1, sep = ",") %>% as.matrix()


adjm %>%
    datatable(extensions = 'Buttons',
            options = list(
              dom = 'Bfrtip',
              buttons = c('csv', 'excel'),
              scrollX = TRUE,
              pageLength = 10), 
            caption = "Ground Truth")
gtruth <- igraph::graph_from_adjacency_matrix(adjm, mode = "undirected", diag = F)

num_nodes <- vcount(gtruth)
num_edges <- ecount(gtruth)

set.seed(1234)
plot(gtruth, 
     main = paste("Ground Truth\nNodes:", num_nodes, "Edges:", num_edges),
     vertex.label.color = "black",
     vertex.size = 6, 
     edge.width = 2, 
     vertex.label = NA,
     vertex.color = "steelblue",
     layout = igraph::layout_with_fr)

Simulate Data

ncell <- 500
nodes <- nrow(adjm)

set.seed(1130)
mu_values <- c(1.5, 3, 5)

count_matrices <- lapply(1:3, function(i) {
  set.seed(1130 + i)
  mu_i <- mu_values[i]
  
  count_matrix_i <- simdata(n = ncell, p = nodes, B = adjm, family = "ZINB", 
                            mu = mu_i, mu_noise = 1, theta = 1, pi = 0.2)
  
  count_matrix_df <- as.data.frame(count_matrix_i)
  colnames(count_matrix_df) <- colnames(adjm)
  rownames(count_matrix_df) <- paste("cell", 1:nrow(count_matrix_df), sep = "")
  
  return(count_matrix_df)
})

count_matrices[[1]] %>%
    datatable(extensions = 'Buttons',
            options = list(
              dom = 'Bfrtip',
              buttons = c('csv', 'excel'),
              scrollX = TRUE,
              pageLength = 10), 
            caption = "Simulated count matrix")
saveRDS(count_matrices, "./../analysis/count_matrices.RDS")

Matrices Integration

Late Integration

GENIE3

set.seed(1234)
genie3_late <- infer_networks(count_matrices, method="GENIE3")
saveRDS(genie3_late, "./../analysis/genie3_late.RDS")

genie3_late[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 output")

Simmetric Output and ROC

sgenie3_late <- simmetric(genie3_late, weight_function = "mean")
plotROC(sgenie3_late, adjm, plot_title = "ROC curve - GENIE3 Late Integration")

sgenie3_late[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 simmetric output")

Generate Adjacency and Apply Cutoff

sgenie3_late_wadj <- generate_adjacency(sgenie3_late)
sgenie3_late_adj <- cutoff_adjacency(count_matrices = count_matrices,
                 weighted_adjm_list = sgenie3_late_wadj, 
                 n = 2,
                 method = "GENIE3")
## Matrix 1 Mean 95th Percentile Cutoff: 0.01 
## Matrix 2 Mean 95th Percentile Cutoff: 0.01 
## Matrix 3 Mean 95th Percentile Cutoff: 0.01

sgenie3_late_wadj[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 weight adjacency")
sgenie3_late_adj[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 adjacency")

Comparison with the Ground Truth

scores <- pscores(adjm, sgenie3_late_adj)

scores$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores")
plots <- plotg(sgenie3_late_adj)

ajm_compared <- compare_consensus(sgenie3_late_adj, adjm)

GRNBoost2

set.seed(1234)
grnb_late <- infer_networks(count_matrices, method="GRNBoost2")
saveRDS(grnb_late, "./../analysis/grnb_late.RDS")

grnb_late[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GRNBoost2 output")

Simmetric Output and ROC

sgrnb_late <- simmetric(grnb_late, weight_function = "mean")
plotROC(sgrnb_late, adjm, plot_title = "ROC curve - GRNBoost2 Late Integration")

sgrnb_late[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GRNBoost2 simmetric output")

Generate Adjacency and Apply Cutoff

sgrnb_late_wadj <- generate_adjacency(sgrnb_late)
sgrnb_late_adj <- cutoff_adjacency(count_matrices = count_matrices,
                 weighted_adjm_list = sgrnb_late_wadj, 
                 n = 2,
                 method = "GRNBoost2")
## Matrix 1 Mean 95th Percentile Cutoff: 0.989 
## Matrix 2 Mean 95th Percentile Cutoff: 0.962 
## Matrix 3 Mean 95th Percentile Cutoff: 0.93

sgrnb_late_wadj[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GRNBoost2 weight adjacency")
sgrnb_late_adj[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GRNBoost2 adjacency")

Comparison with the Ground Truth

scores <- pscores(adjm, sgrnb_late_adj)

scores$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores")
plots <- plotg(sgrnb_late_adj)

ajm_compared <- compare_consensus(sgrnb_late_adj, adjm)

Early Integration

early_matrix <- list(earlyj(count_matrices))

GENIE3

set.seed(1234)
genie3_early <- infer_networks(early_matrix, method="GENIE3")

saveRDS(genie3_early, "./../analysis/genie3_early.RDS")

genie3_early[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 output")

Simmetric Output and ROC

sgenie3_early <- simmetric(list(genie3_early[[1]]), weight_function = "mean")
plotROC(sgenie3_early, adjm, plot_title = "ROC curve - GENIE3 Early Integration")

sgenie3_early[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 simmetric output")

Generate Adjacency and Apply Cutoff

sgenie3_early_wadj <- generate_adjacency(sgenie3_early)
sgenie3_early_adj <- cutoff_adjacency(count_matrices = list(early_matrix[[1]]),
                 weighted_adjm_list = sgenie3_early_wadj, 
                 n = 2,
                 method = "GENIE3")
## Matrix 1 Mean 95th Percentile Cutoff: 0.01

sgenie3_early_wadj[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 weight adjacency")
sgenie3_early_adj[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 adjacency")

Comparison with the Ground Truth

scores <- pscores(adjm, sgenie3_early_adj)

scores$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores")
plots <- plotg(sgenie3_early_adj)

ajm_compared <- compare_consensus(sgenie3_early_adj, adjm)

GRNBoost2

set.seed(1234)
grnb_early <- infer_networks(early_matrix, method="GRNBoost2")
saveRDS(grnb_early, "./../analysis/grnb_early.RDS")

grnb_early[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GRNBoost2 output")

Simmetric Output and ROC

sgrnb_early <- simmetric(list(grnb_early[[1]]), weight_function = "mean")
plotROC(sgrnb_early, adjm, plot_title = "ROC curve - GRNBoost2 Early Integration")

sgrnb_early[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GRNBoost2 simmetric output")

Generate Adjacency and Apply Cutoff

sgrnb_early_wadj <- generate_adjacency(sgrnb_early)
sgrnb_early_adj <- cutoff_adjacency(count_matrices = list(early_matrix[[1]]),
                 weighted_adjm_list = sgrnb_early_wadj, 
                 n = 2,
                 method = "GRNBoost2")
## Matrix 1 Mean 95th Percentile Cutoff: 4.84

sgrnb_early_wadj[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GRNBoost2 weight adjacency")
sgrnb_early_adj[[1]] %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GRNBoost2 adjacency")

Comparison with the Ground Truth

scores <- pscores(adjm, sgrnb_early_adj)

scores$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores")
plots <- plotg(sgrnb_early_adj)

ajm_compared <- compare_consensus(sgrnb_early_adj, adjm)

Joint Integration

Joint Random Forest

#https://cran.r-project.org/src/contrib/Archive/JRF/
#install.packages("/home/francescoc/Downloads/JRF_0.1-4.tar.gz", repos = NULL, type = "source")

library(JRF)
jrf_matrices <- count_matrices
jrf_matrices[[1]] <- t(jrf_matrices[[1]])
jrf_matrices[[2]] <- t(jrf_matrices[[2]])
jrf_matrices[[3]] <- t(jrf_matrices[[3]])

result <- JRF(X=jrf_matrices, genes.name = rownames(jrf_matrices[[1]]), ntree = 500, mtry = round(sqrt(length(rownames(jrf_matrices[[1]]))-1)))

result %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "JRF output")
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=it_IT.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=it_IT.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=it_IT.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=it_IT.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] JRF_0.1-4         pROC_1.18.0       DiagrammeR_1.0.11 gridExtra_2.3    
##  [5] reshape2_1.4.4    rbenchmark_1.0.0  learn2count_0.3.2 reticulate_1.34.0
##  [9] DT_0.22           forcats_0.5.1     stringr_1.4.0     dplyr_1.0.9      
## [13] purrr_0.3.4       readr_2.1.2       tidyr_1.2.0       tibble_3.1.7     
## [17] ggplot2_3.3.6     tidyverse_1.3.1   igraph_2.0.3      doParallel_1.0.17
## [21] iterators_1.0.14  foreach_1.5.2     GENIE3_1.16.0    
## 
## loaded via a namespace (and not attached):
##  [1] bitops_1.0-7                matrixStats_0.62.0         
##  [3] fs_1.5.2                    lubridate_1.8.0            
##  [5] RColorBrewer_1.1-3          httr_1.4.3                 
##  [7] GenomeInfoDb_1.30.1         tools_4.1.0                
##  [9] backports_1.4.1             bslib_0.3.1                
## [11] iZID_0.0.1                  utf8_1.2.2                 
## [13] R6_2.5.1                    DBI_1.1.2                  
## [15] BiocGenerics_0.40.0         colorspace_2.0-3           
## [17] withr_2.5.0                 tidyselect_1.1.2           
## [19] compiler_4.1.0              graph_1.72.0               
## [21] Biobase_2.54.0              cli_3.3.0                  
## [23] rvest_1.0.2                 xml2_1.3.3                 
## [25] DelayedArray_0.20.0         labeling_0.4.2             
## [27] sass_0.4.1                  scales_1.2.0               
## [29] digest_0.6.29               rmarkdown_2.14             
## [31] XVector_0.34.0              pkgconfig_2.0.3            
## [33] htmltools_0.5.2             MatrixGenerics_1.6.0       
## [35] highr_0.9                   dbplyr_2.1.1               
## [37] fastmap_1.1.0               htmlwidgets_1.5.4          
## [39] rlang_1.1.4                 readxl_1.4.0               
## [41] rstudioapi_0.13             farver_2.1.0               
## [43] visNetwork_2.1.2            jquerylib_0.1.4            
## [45] generics_0.1.2              jsonlite_1.8.0             
## [47] crosstalk_1.2.0             RCurl_1.98-1.6             
## [49] magrittr_2.0.3              GenomeInfoDbData_1.2.7     
## [51] Matrix_1.6-1.1              Rcpp_1.0.8.3               
## [53] munsell_0.5.0               S4Vectors_0.32.4           
## [55] fansi_1.0.3                 lifecycle_1.0.1            
## [57] stringi_1.7.6               yaml_2.3.5                 
## [59] distributions3_0.2.2        zlibbioc_1.40.0            
## [61] MASS_7.3-57                 SummarizedExperiment_1.24.0
## [63] plyr_1.8.7                  grid_4.1.0                 
## [65] crayon_1.5.1                lattice_0.20-45            
## [67] haven_2.5.0                 hms_1.1.1                  
## [69] knitr_1.39                  pillar_1.7.0               
## [71] GenomicRanges_1.46.1        codetools_0.2-18           
## [73] stats4_4.1.0                reprex_2.0.1               
## [75] glue_1.6.2                  evaluate_0.15              
## [77] modelr_0.1.8                png_0.1-7                  
## [79] vctrs_0.4.1                 tzdb_0.3.0                 
## [81] cellranger_1.1.0            gtable_0.3.0               
## [83] assertthat_0.2.1            xfun_0.30                  
## [85] broom_0.8.0                 SingleCellExperiment_1.16.0
## [87] IRanges_2.28.0              ellipsis_0.3.2